// Parameters:
//
//   sampler2D BumpSampler;
//   sampler2D SpecSampler;
//   sampler2D StippleTexture;
//   sampler2D TextureSampler;
//   float bumpiness;
//   float4 globalScalars;
//   float3 specMapIntMask;
//   float specularColorFactor;
//   float specularFactor;
//   float4 stencil;
//
//
// Registers:
//
//   Name                Reg   Size
//   ------------------- ----- ----
//   globalScalars       c39      1
//   stencil             c52      1
//   specularFactor      c66      1
//   specularColorFactor c72      1
//   specMapIntMask      c73      1
//   bumpiness           c74      1
//   TextureSampler      s0       1
//   BumpSampler         s1       1
//   SpecSampler         s2       1
//   StippleTexture      s10      1
//

    ps_3_0
    def c0, -1, 0, 1, 0
    def c1, -0.5, 9.99999975e-006, 0.5, 0.001953125
    def c2, 3.99600005, 4, 0.125, 0.25
def c150, 0.0, 1.0, 0.0, 0.0
def c175, 0.0, 0.5, 2.0, -1.0
//x - clamp level, y - bump level mix, z - specular inv mix, w - diffuse mix
def c177, 1.4, 4.0, 1.0, 0.35
//w - detail tiles count
def c178, 0.0, 0.0, 0.0, 3.0
    dcl_texcoord v0.xy
    dcl_texcoord1 v1.xyz
    dcl_texcoord4 v2.xyz
    dcl_texcoord5 v3.xyz
    dcl_color v4.xw
    dcl vPos.xy
    dcl_2d s0
    dcl_2d s1
    dcl_2d s2
    dcl_2d s10
dcl_2d s13
    texld r0, v0, s0


//moved from below
texld r12, v0, s2
mul r12.w, r12.w, c66.x
dp3 r12.x, r12, c73


mov r10.zw, c150.x
mov r10.xy, v0
mul r10.xy, r10, c178.w
texld r11, r10, s13
mad r11, r11, c175.z, c175.w //for normals
//mul r0.xyz, r0, r11
//mad r10.xyz, r11.w, c175.z, c175.w
mov r10.xyz, r11.w
//mul r0.xyz, r0, r10
mul r10, r10, c177.w
add r10, r10, -c175.w
//mul r2.xyz, r2, r10
//mul r0.xyz, r0, r10
mul r10, r0, r10
mov r9.xyz, r0

//affect dark only
max r9.w, r0.x, r0.y
max r9.w, r9.w, r0.z
mul_sat r9.w, r9.w, c177.x
//mul r9.w, r9.w, r9.w
//mul r9.w, r9.w, r9.w
pow r9.w, r9.w, c177.x
//mul_sat r9.w, r9.w, r12.x //affect specular level
lrp r0.xyz, r9.w, r9, r10



    mul r0.w, r0.w, v4.w
    mul r0.w, r0.w, c39.x
    mov_sat r1.x, r0.w
    mul r1.x, r1.x, c2.x
    frc r1.y, r1.x
    mul r1.z, r1.y, c2.y
    frc r1.w, r1.z
    add r2.xy, r1.zxzw, -r1.wyzw
    mul r1.xy, c2.z, vPos
    frc r1.xy, r1_abs
    cmp r1.xy, vPos, r1, -r1
    mul r1.xy, r1, c2.w
    mad r1.xy, r2, c2.w, r1
    texld r1, r1, s10
    cmp r1, -r1.y, c0.x, c0.y
    texkill r1
    texld r1, v0, s1

//apply to bump
/*
//restore normal, then convert back
mad r1.xyz, r1, c175.z, c175.w

mad r11.xyz, r11, c177.y, r1
mul r12.x, r12.x, c177.x
add_sat r9.w, r9.w, r12.x //affect specular level
lrp r1.xyz, r9.w, r1, r11

mad r1.xyz, r1, c175.y, c175.y
*/


//higher quality
mad r1.xyz, r1, c175.z, c175.w

mul r10.z, r11.z, c199.x //amount of bump
mov r10.xy, r11.xy
nrm r11.xyz, r10.xyz

mov r14.xyz, r1.xzy
mov r15.xyz, r1.zyx
mov r16.xyz, r1.yxz
dp3 r10.y, r11.xyz, r14.xyz
dp3 r10.x, r11.xyz, r15.xyz
dp3 r10.z, r11.xyz, r16.xyz

//nrm r1.xyz, r10.xyz

nrm r11.xyz, r10.xyz
add_sat r9.w, r9.w, r12.x //affect specular level
lrp r1.xyz, r9.w, r1, r11
//nrm r1.xyz, r10.xyz

mad r1.xyz, r1, c175.y, c175.y


//mov r13, r1

    add r1.z, -r1.w, c0.z
    add r1.z, -r1.x, r1.z
    cmp r1.z, r1.z, c0.w, c0.z
    lrp r2.xy, r1.z, r1, r1.wyzw
    add r1.xy, r2, c1.x
    mul r1.xy, r1, c74.x
    dp2add r1.z, r2, -r2, c0.z
    rsq r1.z, r1.z
    rcp r1.z, r1.z
    mul r2.xyz, r1.x, v2
    mad r1.xzw, v1.xyyz, r1.z, r2.xyyz
    mad r1.xyz, r1.y, v3, r1.xzww
    add r1.xyz, r1, c1.y
    dp3 r1.w, r1, r1
    rsq r1.w, r1.w

mov r2, r12
//    texld r2, v0, s2 //orig
//    mul r2.w, r2.w, c66.x
//    dp3 r2.x, r2, c73

/*
//v1
mul r11.w, r2.x, r11.w
lrp r2.x, r9.w, r2.x, r11.w
*/
/*
//v2 better
mul r11.w, r2.x, r11.w
mov r12, r2
lrp r2.x, r9.w, r11.w, r12.x
*/

//v3
mul r11.w, r2.x, r11.w
mov r12, r2
//mov r9.w, r2.x
//add_sat r9.w, r2.x, r2.x
mul_sat r9.w, r2.x, c177.z
lrp r2.x, r9.w, r12.x, r11.w
mov_sat r2.x, r2.x


    mul r2.x, r2.x, c72.x
    mad r1.xyz, r1, r1.w, c0.z
    mul oC1.xyz, r1, c1.z
    mul oC2.x, r2.x, c1.z
    mul r1.x, r2.w, c1.w
    rsq r1.x, r1.x
    rcp oC2.y, r1.x
    mov oC0, r0
    mov oC1.w, r0.w
    mov oC2.z, v4.x
    mov oC2.w, r0.w
    mov r0.zw, c0
    mul oC3, r0.zwww, c52.x


// approximately 50 instruction slots used (4 texture, 46 arithmetic)
 